## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## [conflicted] Will prefer dplyr::filter over any other package.
## [conflicted] Will prefer dplyr::select over any other package.
## [conflicted] Will prefer dplyr::slice over any other package.
## [conflicted] Will prefer dplyr::rename over any other package.
## [conflicted] Will prefer dplyr::intersect over any other package.
mytheme = theme_bw(base_size = 10) +
theme(text = element_text(size=10, colour='black'),
title = element_text(size=10, colour='black'),
line = element_line(size=0.5),
axis.title = element_text(size=10, colour='black'),
axis.text = element_text(size=10, colour='black'),
axis.line = element_line(colour = "black"),
axis.ticks = element_line(size=0.5),
strip.background = element_blank(),
strip.text.y = element_blank(),
panel.grid = element_blank(),
panel.border = element_blank(),
legend.position = c(0.8, 0.8),
legend.text=element_text(size=10)) ## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Rows: 1137099 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (16): chr, strand, cell_line, tool, circ_id, circ_id_strand, count_group...
## dbl (7): start, end, BSJ_count, n_detected, n_db, estim_len_in, BSJ_count_m...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 1560 Columns: 48
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (27): chr, strand, circ_id, circ_id_strand, tool, cell_line, FWD_primer,...
## dbl (21): start, end, FWD_pos, FWD_length, REV_pos, REV_length, FWD_TM, REV_...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Rows: 29 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): tool, count_group
## dbl (16): nr_qPCR_total, nr_qPCR_fail, nr_qPCR_val, nr_RR_total, nr_RR_fail,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
val$tool = factor(val$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
cqval_longer = val %>% pivot_longer(cols = c(perc_qPCR_val, perc_RR_val, perc_amp_val),
names_to = 'val_type', values_to = 'perc') %>%
left_join(val %>% select(tool, count_group, nr_qPCR_total, nr_RR_total, nr_amp_total) %>%
rename(perc_qPCR_val = nr_qPCR_total, perc_RR_val = nr_RR_total, perc_amp_val = nr_amp_total) %>%
pivot_longer(cols = c(perc_qPCR_val, perc_RR_val, perc_amp_val), values_to = 'nr_in_group',
names_to = "val_type")) %>%
select(tool, count_group, val_type, perc, nr_in_group) %>%
mutate(margin = qnorm(0.975)*sqrt(perc*(1-perc)/nr_in_group),
CI_low = perc - margin,
CI_up = perc + margin) %>%
mutate(count_group == ifelse(count_group == "count < 5", 'BSJ count < 5', count_group),
count_group == ifelse(count_group == "count ≥ 5", 'BSJ count ≥ 5', count_group),
count_group == ifelse(count_group == "no_counts", 'no BSJ count', count_group)) ## Joining with `by = join_by(tool, count_group, val_type)`
val_longer$tool = factor(val_longer$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
val_longer$val_type = factor(val_longer$val_type, levels = c('perc_qPCR_val', 'perc_RR_val', 'perc_amp_val'))
val_longer %>%
filter(count_group == 'count < 5', val_type == "perc_amp_val") %>%
pull(nr_in_group) %>% quantile()## 0% 25% 50% 75% 100%
## 11 14 15 17 20
val_longer %>%
ggplot(aes(tool, perc, fill = count_group)) +
geom_bar(stat = 'identity') +
geom_errorbar(aes(ymin=CI_low, ymax=CI_up),
width=.3, color = 'grey45') +
facet_grid(scales = 'free_x', space = "free",
rows = vars(val_type), cols = vars(count_group)) +
scale_y_continuous(labels = scales::percent_format(), breaks = c(0,0.25, 0.5, 0.75, 1)) +
mytheme_discrete_x +
#theme(strip.text.y = element_text(size = 10), legend.position = "none") +
theme(legend.position = "none") +
scale_fill_manual(values = c('#00B9F2', '#E69F00' , '#999999')) +
xlab('') +
ylab('')upset
upset = list()
upset['qPCR_val'] = cq %>% filter(qPCR_val == "pass") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['RR_val'] = cq %>% filter(RR_val == "pass") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['amp_seq_val'] = cq %>% filter(amp_seq_val == "pass") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['qPCR_fail'] = cq %>% filter(qPCR_val == "fail") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['RR_fail'] = cq %>% filter(RR_val == "fail") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['amp_seq_fail'] = cq %>% filter(amp_seq_val == "fail") %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['RR_out_of_range'] = cq %>% filter(is.na(RR_val)) %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
upset['amp_seq_not_included'] = cq %>% filter(is.na(amp_seq_val)) %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% pull(id_cl) %>% unique() %>% list()
# get number y-axis to add to plot
cq %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% select(id_cl, qPCR_val) %>% unique() %>% count(qPCR_val)cq %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% select(id_cl, RR_val) %>% unique() %>% count(RR_val)cq %>% mutate(id_cl = paste(circ_id, cell_line, sep = "_")) %>% select(id_cl, amp_seq_val) %>% unique() %>% count(amp_seq_val)# pdf(file="../tmp_figures/figure_3B_upset_small.pdf", onefile=FALSE,
# width = 8, height = 2.5)
upset(fromList(upset),
order.by = "freq",
sets = c('qPCR_val', 'RR_val', 'amp_seq_val', 'qPCR_fail', 'RR_fail', 'amp_seq_fail', 'RR_out_of_range', 'amp_seq_not_included'),
mainbar.y.label = "number of circRNAs", sets.x.label = "number of circRNAs",
keep.order = TRUE,
number.angles = 30,
point.size = 2.5, line.size = 1)add total nr of circRNAs and calculate theoretical nr of TP
val_cl = val %>%
# use perc_compound_val
select(tool, count_group, perc_compound_val) %>%
# get the number of detected circRNAs for each cell line and tool, per count group
left_join(all_circ %>%
group_by(cell_line, tool, count_group) %>%
summarise(total_n_ut = n())) %>%
# calculate the theoretical nr of validated circ
mutate(theo_TP_all = perc_compound_val * total_n_ut)## `summarise()` has grouped output by 'cell_line', 'tool'. You can override using
## the `.groups` argument.
## Joining with `by = join_by(tool, count_group)`
val_cl$tool = factor(val_cl$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
val_cl %>%
#filter(cell_line == 'HLF') %>%
ggplot() +
geom_bar(aes(tool, total_n_ut), stat = "identity", fill = 'grey') +
geom_bar(aes(tool, theo_TP_all, fill = count_group), stat = "identity") +
facet_wrap(~cell_line+count_group, scales = 'free') +
#facet_wrap(~count_group, scales = 'free') +
#facet_grid(rows = vars(cell_line), cols = vars(count_group), scales = 'free', space = 'free') +
mytheme_discrete_x +
scale_fill_manual(values = c('#00B9F2', '#E69F00' , '#999999')) +
xlab('') +
ylab('') +
theme(legend.position = NULL) +
scale_y_continuous(labels = scales::comma_format()) +
theme(legend.position="none")